
## #######################################################################################################
## 
## Simulation Replication R command file for:
## Kosuke Imai, Bethany Park, and Kenneth Greene "Using the Predicted Responses from List Experiments as
## Explanatory Variables in Regression Models."
##
## NB: The main replication file for the tables and figures is in this archive as Replication.R
##
## Created 18 October 2014
## 
## Contact Bethany Park <bapark@princeton.edu> with questions
## 
## #######################################################################################################

###### STEPS
## 1 Create simulation functions
## 2 Run simulations (change ssize and sims objects); save draws
## 3 To recreate figure from Imai, Park, and Greene (2014), load our simulation draws
## 4 Create table and figures

###########################################
###### Create simulation functions  #######

ictreg.joint.one <- function(par, data, treat, J, o, outcome.reg = "logistic",
                             x.all, y.all, constrained = FALSE, bayes = FALSE,
                             maxIter = 1000, priorscale = 2.5,...) {

    obs.llik.binom.std <- function(par, J, y, treat, x, o, constrained = FALSE,
                                   bayes = FALSE, priorscale = 2.5, outcome.reg = "logistic") {
        k <- ncol(x) ## number of covariates
        if (constrained == FALSE) { ## unconstrained model
            coef.h <- par[1:(k+1)]
            coef.g <- par[(k+2):(2*k+1)]
            x.h0 <- cbind(x,0)
            x.h1 <- cbind(x,1)
            hX0 <- logistic(x.h0 %*% coef.h) ## z = 0
            hX1 <- logistic(x.h1 %*% coef.h) ## z = 1
            gX <- logistic(x %*% coef.g)
            if (outcome.reg == "logistic") {
                coef.f <- par[(2*k + 2):(3*k + 3)]
                x.fy0 <- cbind(x, as.matrix(y), 0) #where z = 0
                x.fy1 <- cbind(x, as.matrix(y - treat), 1)
                fXy1 <- logistic(x.fy1 %*% coef.f)
                fXy0 <- logistic(x.fy0 %*% coef.f)
            } else if(outcome.reg == "multinomial") {
                o <- as.numeric(o)
                coef.out <- par[(2*k+2):(5*k+1)]
                coef.f <- t(matrix(coef.out, nrow = nlevels))
                x.fy0 <- cbind(1, x, as.matrix(y), 0) #where z = 0
                x.fy1 <- cbind(1, x, as.matrix(y - treat), 1)
                fXy1 <- logistic(x.fy1 %*% coef.f)
                fXy0 <- logistic(x.fy0 %*% coef.f)
            } else if(outcome.reg == "linear") {
                coef.f <- par[(2*k + 2):(3*k + 3)]
                sigma <- par[3*k + 4]
                x.fy0 <- cbind(x, as.matrix(y), 0) #where z = 0
                x.fy1 <- cbind(x, as.matrix(y - treat), 1)
                fXy1 <- x.fy1 %*% coef.f
                fXy0 <- x.fy0 %*% coef.f                
            }
        } else{ ## constrained model
            coef.h <- par[1:(k)]
            coef.g <- par[(k+1):(2*k)]
            x.h0 <- cbind(x)
            x.h1 <- cbind(x)
            hX0 <- logistic(x.h0 %*% coef.h)
            hX1 <- logistic(x.h1 %*% coef.h)
            gX <- logistic(x %*% coef.g)
            if(outcome.reg == "logistic") {
                coef.f <- par[(2*k + 1):(3*k + 1)]
                x.fy0 <- cbind(x, 0) #where z = 0
                x.fy1 <- cbind(x, 1)
                fXy1 <- logistic(x.fy1 %*% coef.f)
                fXy0 <- logistic(x.fy0 %*% coef.f)
            } else if(outcome.reg == "multinomial") {
                o <- as.numeric(o)
                coef.out <- par[(2*k+1):(4*k+3)]
                coef.f <- t(matrix(coef.out, nrow = nlevels))
                x.fy0 <- cbind(1, x, 0) #where z = 0
                x.fy1 <- cbind(1, x, 1)
                fXy1 <- logistic(x.fy1 %*% coef.f)
                fXy0 <- logistic(x.fy0 %*% coef.f)
            } else if(outcome.reg == "linear") {
                coef.f <- par[(2*k + 1):(3*k + 1)]
                sigma <- par[3*k + 2]
                x.fy0 <- cbind(x, 0) #where z = 0
                x.fy1 <- cbind(x, 1)
                fXy1 <- x.fy1 %*% coef.f
                fXy0 <- x.fy0 %*% coef.f
            }
        }
        if(outcome.reg == "linear") {
            ind10 <- ((treat == 1) & (y == 0))
            ind1J1 <- ((treat == 1) & (y == (J+1)))
            ind1y <- ((treat == 1) & (y > 0) & (y < (J+1)))
            nottreat <- (treat==0)
            if (sum(ind10) > 0) {
                p10 <- sum(log(1-gX[ind10])
                           + dbinom(x = y[ind10], size = J, prob = hX0[ind10], log = TRUE)
                           + dnorm(o[ind10], mean = fXy0[ind10], sd = sigma, log = TRUE))
            } else {
                p10 <- 0
            }
            if (sum(ind1J1) > 0) {
                p1J1 <- sum(log(gX[ind1J1])
                        + dbinom(y[ind1J1] - 1, size = J, prob = hX1[ind1J1], log = TRUE)
                            + dnorm(o[ind1J1], mean = fXy1[ind1J1], sd = sigma, log = TRUE))
            } else {
                p1J1 <- 0
            }
            if (sum(ind1y) > 0) {
                p1y <- sum(log(exp(log(gX[ind1y]) + dbinom(y[ind1y]-1, size = J, prob = hX1[ind1y],log = TRUE)
                                  + dnorm(o[ind1y], mean = fXy1[ind1y], sd = sigma, log = TRUE))
                               + exp(log(1-gX[ind1y]) + dbinom(y[ind1y], size = J, prob = hX0[ind1y], log = TRUE)
                                     + dnorm(o[ind1y], mean = fXy0[ind1y], sd = sigma, log = TRUE))))
            } else {
                p1y <- 0
            }
            if (sum(treat == 0) > 0) {
                p0y <- sum(log(exp(log(gX[nottreat]) + dbinom(y[nottreat], size = J, prob = hX1[nottreat], log = TRUE)
                                   + dnorm(o[nottreat], mean = fXy1[nottreat], sd = sigma, log = TRUE)) 
                               + exp(log(1-gX[nottreat]) + dbinom(y[nottreat], size = J, prob = hX0[nottreat], log = TRUE)
                                     + dnorm(o[nottreat], mean = fXy0[nottreat], sd = sigma, log = TRUE) ) )) ## control group
            } else {
                p0y <- 0
            }
        }
        else if(outcome.reg == "logistic") {
            ind10 <- ((treat == 1) & (y == 0))
            ind1J1 <- ((treat == 1) & (y == (J+1)))
            ind1y <- ((treat == 1) & (y > 0) & (y < (J+1)) & (o == 1))
            nottreat <- (treat==0)
            if (sum(ind10) > 0) {
                p10 <- sum(log(1-gX[ind10])
                           + dbinom(x = y[ind10], size = J, prob = hX0[ind10], log = TRUE)
                           + o[ind10] * log(fXy0[ind10]) + (1 - o[ind10]) * log(1-fXy0[ind10]))
            } else {
                p10 <- 0
            }
            if (sum(ind1J1) > 0) {
                p1J1 <- sum(log(gX[ind1J1])
                            + dbinom(y[ind1J1] - 1, size = J, prob = hX1[ind1J1], log = TRUE)
                            + o[ind1J1] * log(fXy1[ind1J1]) + (1 - o[ind1J1]) * log(1 - fXy1[ind1J1]))
            } else {
                p1J1 <- 0
            }
            if (sum(ind1y) > 0) {
                p1y <- sum(log(exp(log(gX[ind1y]) + dbinom(y[ind1y]-1, size = J, prob = hX1[ind1y],log = TRUE)
                                   + o[ind1y]*log(fXy1[ind1y]) + (1-o[ind1y])*log(1-fXy1[ind1y]))
                               + exp(log(1-gX[ind1y]) + dbinom(y[ind1y], size = J, prob = hX0[ind1y], log = TRUE)
                                     + o[ind1y]*log(fXy0[ind1y]) + (1-o[ind1y])*log(1-fXy0[ind1y]))))
            } else {
                p1y <- 0
            }
            if (sum(treat == 0) > 0) {
                p0y <- sum(log(exp(log(gX[nottreat]) + dbinom(y[nottreat], size = J, prob = hX1[nottreat], log = TRUE)
                                   + o[nottreat]*log(fXy1[nottreat]) + (1-o[nottreat])*log(1-fXy1[nottreat])) 
                               + exp(log(1-gX[!treat]) + dbinom(y[nottreat], size = J, prob = hX0[nottreat], log = TRUE)
                                     + o[nottreat]*log(fXy0[nottreat]) + (1-o[nottreat])*log(1-fXy0[nottreat]) ) )) ## control group
            } else {
                p0y <- 0
            }
        }
        if (bayes == TRUE) {
            pprior <- sum(dcauchy(x = coef.f, scale = rep(priorscale, length(coef.f)), log = TRUE))
        } else {
            pprior <- 0
        }
        return(p10+p1J1+p1y+p0y+pprior)
    }
    
    ## Estep (weights)
    Estep.binom.std <- function(par, J, y, treat, x, o, constrained = FALSE, outcome.reg = "logistic") {
        k <- ncol(x) ## number of covariates
        if (constrained == FALSE) { ## unconstrained model
            coef.h <- par[1:(k+1)]
            coef.g <- par[(k+2):(2*k+1)]
            x.h0 <- cbind(x,0)
            x.h1 <- cbind(x,1)
            hX0 <- logistic(x.h0 %*% coef.h) ## z = 0
            hX1 <- logistic(x.h1 %*% coef.h) ## z = 1
            gX <- logistic(x %*% coef.g)
            if (outcome.reg == "logistic") {
                coef.f <- par[(2*k + 2):(3*k + 3)]
                x.fy0 <- cbind(x, as.matrix(y), 0) #where z = 0
                x.fy1 <- cbind(x, as.matrix(y - treat), 1)
                fXy1 <- logistic(x.fy1 %*% coef.f)
                fXy0 <- logistic(x.fy0 %*% coef.f)
            } else if(outcome.reg == "multinomial") {
                o <- as.numeric(o)
                coef.out <- par[(2*k+2):(5*k+1)]
                coef.f <- t(matrix(coef.out, nrow = nlevels))
                x.fy0 <- cbind(1, x, as.matrix(y), 0) #where z = 0
                x.fy1 <- cbind(1, x, as.matrix(y - treat), 1)
                fXy1 <- logistic(x.fy1 %*% coef.f)
                fXy0 <- logistic(x.fy0 %*% coef.f)
            } else if(outcome.reg == "linear") {
                coef.f <- par[(2*k + 2):(3*k + 3)]
                sigma <- par[3*k + 4]
                x.fy0 <- cbind(x, as.matrix(y), 0) #where z = 0
                x.fy1 <- cbind(x, as.matrix(y - treat), 1)
                fXy1 <- x.fy1 %*% coef.f
                fXy0 <- x.fy0 %*% coef.f
            }
        } else { ## constrained model
            coef.h <- par[1:(k)]
            coef.g <- par[(k+1):(2*k)]
            x.h0 <- cbind(x)
            x.h1 <- cbind(x)
            hX0 <- logistic(x.h0 %*% coef.h)
            hX1 <- logistic(x.h1 %*% coef.h)
            gX <- logistic(x %*% coef.g)
            if(outcome.reg == "logistic") {
                coef.f <- par[(2*k + 1):(3*k + 1)]
                x.fy0 <- cbind(x, 0) #where z = 0
                x.fy1 <- cbind(x, 1)
                fXy1 <- logistic(x.fy1 %*% coef.f)
                fXy0 <- logistic(x.fy0 %*% coef.f)
            } else if(outcome.reg == "multinomial") {
                o <- as.numeric(o)
                coef.out <- par[(2*k+1):(4*k+3)]
                coef.f <- t(matrix(coef.out, nrow = nlevels))
                x.fy0 <- cbind(1, x, 0) #where z = 0
                x.fy1 <- cbind(1, x, 1)
                fXy1 <- logistic(x.fy1 %*% coef.f)
                fXy0 <- logistic(x.fy0 %*% coef.f)
            } else if(outcome.reg == "linear") {
                coef.f <- par[(2*k + 1):(3*k + 1)]
                sigma <- par[3*k + 2]
                x.fy0 <- cbind(x, 0) #where z = 0
                x.fy1 <- cbind(x, 1)
                fXy1 <- x.fy1 %*% coef.f
                fXy0 <- x.fy0 %*% coef.f
            }
        }
        ind <- !((treat == 1) & ((y == 0) | (y == (J+1))))
        w <- rep(NA, length(y))
        if(outcome.reg == "logistic") {        
            w[ind] <- exp(log(o[ind]*fXy1[ind] + (1-o[ind])*(1-fXy1[ind]))
                          + log(gX[ind]) + dbinom((y-treat)[ind], size = J, prob = hX1[ind], log = TRUE)
                          - log(exp(log(o[ind]*fXy1[ind] + (1-o[ind])*(1-fXy1[ind]))+log(gX[ind])
                                    + dbinom((y-treat)[ind], size = J, prob = hX1[ind], log = TRUE))
                                + exp(log(o[ind]*fXy0[ind] + (1-o[ind])*(1-fXy0[ind]))
                                      + log(1-gX[ind])+dbinom(y[ind], size = J, prob = hX0[ind], log = TRUE))))
        }
        else if(outcome.reg == "linear") {
            w[ind] <- exp(dnorm(o[ind], mean = fXy1[ind], sd = sigma, log = TRUE)
                          + log(gX[ind]) + dbinom((y-treat)[ind], size = J, prob = hX1[ind], log = TRUE) 
                          - log(exp(dnorm(o[ind], mean = fXy1[ind], sd = sigma, log = TRUE)
                                    + log(gX[ind]) + dbinom((y-treat)[ind], size = J, prob = hX1[ind], log = TRUE))
                                + exp(dnorm(o[ind], mean = fXy0[ind], sd = sigma, log = TRUE)
                                      + log(1-gX[ind]) + dbinom(y[ind], size = J, prob = hX0[ind], log = TRUE))))

        }
        w[(treat == 1) & (y == 0)] <- 0
        w[(treat == 1) & (y == (J+1))] <- 1        
        return(w)
    }
    
    ## Mstep 1: g (sensitive item)
    
    wlogit.fit.std <- function(y, treat, x, w, par = NULL, constrained = FALSE) {
        nPar <- ncol(x)
        yrep <- rep(c(1,0), each = length(y))
        xrep <- rbind(x, x)
        wrep <- c(w, 1-w)
        yrep <- yrep[wrep > 0]
        xrep <- xrep[wrep > 0, ]
        wrep <- wrep[wrep > 0]
        if(constrained == FALSE) {
            par <- par[(nPar+2):(2*nPar+1)]
        }
        else {
            par <- par[(nPar+1):(2*nPar)]
        }
        return(glm(cbind(yrep, 1-yrep) ~ xrep - 1, weights = wrep, family = binomial(logit), start = par))
    }

    ## Mstep 3: h (non-sensitive)
    wbinomial.fit <- function(y, treat, x, w, par = NULL, constrained = FALSE) {
        nPar <- ncol(x)
        yrep <- c(y - treat, y)
        xrep <- rbind(x, x)
        zrep <- rep(c(1, 0), each = length(y))
        wrep <- c(w, 1-w)
        yrep <- yrep[wrep > 0]
        xrep <- xrep[wrep > 0, ]
        zrep <- zrep[wrep > 0]
        wrep <- wrep[wrep > 0]
        if(constrained == FALSE){
            par <- par[1:(nPar+1)]
            fit <- glm(cbind(yrep, J - yrep) ~ xrep + zrep - 1, weights = wrep, family = binomial(logit), start = par)
        } else {
            par <- par[1:(nPar)]
            fit <- glm(cbind(yrep, J - yrep) ~ xrep - 1, weights = wrep, family = binomial(logit), start = par)
        }
        return(fit)
    }
        
    ## Mstep 2: f (outcome)
    ## par <- c(coef.control.start, coef.treat.start, coef.outcome.start, sigma.start) 
    ## par <- par[(2*k + 2):(3*k + 3)]
    wlogit.fit.outcome <- function(y, treat, x, w, o, par = NULL, constrained = FALSE, bayes = FALSE) {            
        yrep <- c((y - treat), y)
        xrep <- rbind(x, x)
        zrep <- rep(c(1,0), each = length(y))
        wrep <- c(w, 1-w)
        orep <- c(o,o)
        nPar <- ncol(x)
        yrep <- yrep[wrep > 0]
        orep <- orep[wrep > 0]
        xrep <- xrep[wrep > 0, ]
        zrep <- zrep[wrep > 0]
        wrep <- wrep[wrep > 0]
        if(constrained == FALSE){
            if(outcome.reg == "logistic" & bayes == FALSE){
                par <- par[(2*nPar + 2):(3*nPar + 3)]
                fit <- glm(cbind(orep, 1-orep) ~ xrep + yrep + zrep - 1, weights = wrep, family = binomial(logit),
                           start = par)
            } else if(outcome.reg == "logistic" & bayes == TRUE){
                par <- par[(2*nPar + 2):(3*nPar + 3)]
                fit <- bayesglm(cbind(orep, 1-orep) ~ xrep + yrep + zrep - 1,
                                weights = wrep, family = binomial(logit),
                                start = par, prior.scale = priorscale)
            } else if(outcome.reg == "multinomial"){
                fit <- multinom(cbind(orep, 1-orep) ~ xrep + yrep + zrep - 1, weights = wrep, start = par)
            } else if(outcome.reg == "linear"){
                par <- par[(2*nPar + 2):(3*nPar + 3)]
                fit <- lm(orep ~ xrep + yrep + zrep - 1, weights = wrep)
            }
        } else { #constrained
            if(outcome.reg == "logistic" & bayes == FALSE){
                par <- par[(2*nPar + 1):(3*nPar + 1)]
                fit <- glm(cbind(orep, 1-orep) ~ xrep + zrep - 1, weights = wrep, family = binomial(logit), start = par)
            } else if(outcome.reg == "logistic" & bayes == TRUE) {
                par <- par[(2*nPar + 1):(3*nPar + 1)]               
                fit <- bayesglm(cbind(orep, 1-orep) ~ xrep + zrep - 1, weights = wrep, family = binomial(logit),
                                start = par, prior.scale = priorscale)    
            } else if(outcome.reg == "multinomial"){
                fit <- multinom(cbind(orep, 1-orep) ~ xrep + zrep - 1, weights = wrep, start = par)
            } else if(outcome.reg == "linear"){
                par <- par[(2*nPar + 1):(3*nPar + 1)]
                fit <- lm(orep ~ xrep + zrep - 1, weights = wrep)
            }
        }
        return(fit)
    }
    
########################################
########## NOW THE ALGORITHM ###########

    ## start off with an infinitely negative log likelihood
    pllik.const <- -Inf
    counter <- 0
    nPar <- ncol(x.all)
    
    ## calculate the log likelihood at the starting values
    llik.const <- obs.llik.binom.std(par, J = J, y = y.all, treat = treat, o = o,
                                     x = x.all, constrained = constrained,
                                     bayes = bayes, outcome.reg = outcome.reg)
    #print(llik.const)
    while (((llik.const - pllik.const) > 10^(-4)) & (counter < maxIter)) {
        
        w <- Estep.binom.std(par, J, y = y.all, treat = treat, x = x.all, o = o,
                             outcome.reg = outcome.reg, constrained = constrained)
        ## treatment 
        lfit <- wlogit.fit.std(y.all, treat = treat, x.all, w, par = par, constrained = constrained)
        #print(lfit) #problem here?
        ## outcome
        if(outcome.reg == "logistic" | outcome.reg == "linear"){
            outfit <- wlogit.fit.outcome(y.all, treat = treat, x.all, w, o = o, par = par,
                                         constrained = constrained, bayes = bayes)
        } else if(outcome.reg == "multinomial") {
            coef.out <- par[(2*nPar+2):(5*nPar+1)]
            coef.f <- t(matrix(coef.out, nrow = nlevels))
            outfit <- wlogit.fit.outcome(y.all, treat = t, x.all, w,
                                         o = o, par = coef.f)
        }
        #print(outfit)
        ## control 
        fit <- wbinomial.fit(y = y.all, treat = treat, x = x.all, w = w, par = par,
                             constrained = constrained)
        #print(fit)
        outfit.sum <- summary(outfit)
        sigma <- outfit.sum$sigma*sqrt(outfit.sum$df[2]/(sum(outfit$weights)-outfit.sum$df[1]))
        par <- c(coef(fit), coef(lfit), coef(outfit), sigma) #control, treat, outcome, sigma
        pllik.const <- llik.const ## make old llik the new one
        
        ## calculate the new log likelihood 
        llik.const <- obs.llik.binom.std(par, J = J, y = y.all, treat = treat, o = o,
                                         x = x.all, constrained = constrained,
                                         bayes = bayes, outcome.reg = outcome.reg)
        counter <- counter + 1 ## up the counter
        #print(counter)
        
        if (llik.const < pllik.const)
            warning("log-likelihood is not monotonically increasing.")  
        if(counter == (maxIter-1))
            warning("number of iterations exceeded maximum in ML")
        
        cat("llik:", llik.const, "\n")
        cat("llik diff:", llik.const - pllik.const, "\n")
        cat("par:", par, "\n")        
    }

    score <- function(par, J, y, treat, x, o, constrained = TRUE, outcome.reg = "logistic") {
        k <- ncol(x)
        if (constrained == FALSE) { ## unconstrained model
            coef.h <- par[1:(k+1)]
            coef.g <- par[(k+2):(2*k+1)]
            x.h0 <- cbind(x,0)
            x.h1 <- cbind(x,1)
            coef.f <- par[(2*k + 2):(3*k + 3)]
            x.fy0 <- cbind(x, as.matrix(y), 0) #where z = 0
            x.fy1 <- cbind(x, as.matrix(y - treat), 1)
            if (outcome.reg == "linear") {
                sigma <- par[3*k + 4]
            }
        } else { ## constrained model
            k <- ncol(x)
            coef.h <- par[1:(k)]
            coef.g <- par[(k+1):(2*k)]
            x.h0 <- x.h1 <- cbind(x)
            coef.f <- par[(2*k + 1):(3*k + 1)]
            x.fy0 <- cbind(x, 0) #where z = 0
            x.fy1 <- cbind(x, 1)
            if (outcome.reg == "linear") {
                sigma <- par[3*k + 2]
            }
        }
        hX0 <- logistic(x.h0 %*% coef.h) ## z = 0
        hX1 <- logistic(x.h1 %*% coef.h) ## z = 1
        gX <- logistic(x %*% coef.g)
        Xb1 <- x.fy1 %*% coef.f #this is x*beta
        Xb0 <- x.fy0 %*% coef.f
        if (outcome.reg == "logistic") {
            fXy1 <- exp(o * Xb1) / (1 + exp(Xb1)) 
            fXy0 <- exp(o * Xb0) / (1 + exp(Xb0)) 
            fprime1 <- as.vector((2*o - 1) * (exp(Xb1) / (1 + exp(Xb1))^2)) * x.fy1
            fprime0 <- as.vector((2*o - 1) * (exp(Xb0) / (1 + exp(Xb0))^2)) * x.fy0
        } else if (outcome.reg == "linear") {
            fXy1 <- dnorm(o, mean = Xb1, sd = sigma, log = TRUE)
            fXy0 <- dnorm(o, mean = Xb0, sd = sigma, log = TRUE)
            f.partial.beta1 <- as.vector(exp(fXy1) * (1/sigma^2) * (o - Xb1)) * x.fy1 #this is right, 1000 x 4, vector multi
            f.partial.beta0 <- as.vector(exp(fXy0) * (1/sigma^2) * (o - Xb0)) * x.fy0 
            f.partial.sigma1 <- exp(fXy1) * (1/(2*sigma^2)) * (-1 + (1/sigma^2) * (o - Xb1)^2)
            f.partial.sigma0 <- exp(fXy0) * (1/(2*sigma^2)) * (-1 + (1/sigma^2) * (o - Xb0)^2) 
            fprime1 <- cbind(f.partial.beta1, f.partial.sigma1)
            fprime0 <- cbind(f.partial.beta0, f.partial.sigma0)            
        }
        
        ind10 <- ((treat == 1) & (y == 0))
        ind1J1 <- ((treat == 1) & (y == (J+1)))
        ind1y <- ((treat == 1) & (y > 0) & (y < (J+1)))
        nottreat <- !treat       
        gprime <- as.vector(gX) * as.vector((1 - gX)) * x
        
        if (outcome.reg == "logistic") {
            parttheta.1 <- as.vector(1 - fXy0) * x.fy0 
            parttheta.2 <- as.vector(1 - fXy1) * x.fy1 
            parttheta.3 <- (fprime1 * dbinom(y - treat, size = J, prob = hX1, log = FALSE) * as.vector(gX) + fprime0 * dbinom(y, size = J, prob = hX0, log = FALSE) * as.vector(1 - gX)) / (as.vector(fXy1) * dbinom(y - treat, size = J, prob = hX1, log = FALSE) * as.vector(gX) + as.vector(fXy0) * dbinom(y, size = J, prob = hX0, log = FALSE) * as.vector(1 - gX))
            parttheta.4 <- (fprime1 * dbinom(y, size = J, prob = hX1, log = FALSE) * as.vector(gX) + fprime0 * dbinom(y, size = J, prob = hX0, log = FALSE) * as.vector(1 - gX)) / (as.vector(fXy1) * dbinom(y, size = J, prob = hX1, log = FALSE) * as.vector(gX) + as.vector(fXy0)* dbinom(y, size = J, prob = hX0, log = FALSE) * as.vector(1 - gX))
        } else if (outcome.reg == "linear") {
            parttheta.1 <- cbind(as.vector((1/sigma^2) * (o - Xb0)) * x.fy0, (1/(2*sigma^2)) * (-1 + (1/sigma^2) * (o - Xb0)^2))
            parttheta.2 <- cbind(as.vector((1/sigma^2) * (o - Xb1)) * x.fy1, (1/(2*sigma^2)) * (-1 + (1/sigma^2) * (o - Xb1)^2))
            parttheta.3 <- (fprime1 * exp(dbinom(y - treat, size = J, prob = hX1, log = TRUE) + log(as.vector(gX))) + fprime0 * exp(dbinom(y, size = J, prob = hX0, log = TRUE) + log(as.vector(1 - gX)))) / ((exp(as.vector(fXy1) + dbinom(y - treat, size = J, prob = hX1, log = TRUE) + log(as.vector(gX)))) + exp(as.vector(fXy0) + dbinom(y, size = J, prob = hX0, log = TRUE) + log(as.vector(1 - gX))))

            parttheta.4 <- (fprime1 * exp(dbinom(y, size = J, prob = hX1, log = TRUE) + log(as.vector(gX))) + fprime0 * exp(dbinom(y, size = J, prob = hX0, log = TRUE) + log(as.vector(1 - gX)))) / (exp(as.vector(fXy1) + dbinom(y, size = J, prob = hX1, log = TRUE) + log(as.vector(gX))) + exp(as.vector(fXy0) + dbinom(y, size = J, prob = hX0, log = TRUE) + log(as.vector(1 - gX))))
        }
        parttheta.1r <- parttheta.1 * ind10
        parttheta.2r <- parttheta.2 * ind1J1
        parttheta.3r <- parttheta.3 * ind1y
        parttheta.4r <- parttheta.4 * nottreat ## Not defined where y = 4
        
        idx <- apply(is.na(parttheta.4r), 1, all)
        if (outcome.reg == "logistic" & constrained == FALSE) {
            cero <- rep(0, ncol(x) + 2)
        } else if (outcome.reg == "logistic" & constrained == TRUE) {
            cero <- rep(0, ncol(x) + 1)
        } else if (outcome.reg == "linear" & constrained == FALSE) {
            cero <- rep(0, ncol(x) + 3)
        } else if (outcome.reg == "linear" & constrained == TRUE){
            cero <- rep(0, ncol(x) + 2)
        }
        parttheta.4r[idx,] <- cero
        
        parttheta <- parttheta.1r + parttheta.2r + parttheta.3r + parttheta.4r
        
        partdel.1 <- -as.vector(gX) * x
        partdel.1r <- partdel.1 * ind10
        
        partdel.2 <- as.vector(1 - gX) * x
        partdel.2r <- partdel.2 * ind1J1

        if (outcome.reg == "logistic") {
            partdel.3 <- (as.vector(fXy1) * dbinom(y - treat, size = J, prob = hX1, log = FALSE) * gprime - as.vector(fXy0) * dbinom(y, size = J, prob = hX0, log = FALSE) * gprime) / (as.vector(fXy1) * dbinom(y - treat, size = J, prob = hX1, log = FALSE) * as.vector(gX) + as.vector(fXy0) * dbinom(y, size = J, prob = hX0, log = FALSE) * as.vector(1 - gX))
            partdel.4 <- (as.vector(fXy1) * dbinom(y, size = J, prob = hX1) * gprime -  as.vector(fXy0) * dbinom(y, size = J, prob = hX0, log = FALSE) * gprime) / (as.vector(fXy1) * dbinom(y, size = J, prob = hX1) * as.vector(gX) + as.vector(fXy0) * dbinom(y, size = J, prob = hX0, log = FALSE) * as.vector(1 - gX))
        } else if (outcome.reg == "linear") {
            partdel.3 <- (exp(as.vector(fXy1) + dbinom(y - treat, size = J, prob = hX1, log = TRUE)) * gprime - exp(as.vector(fXy0) + dbinom(y, size = J, prob = hX0, log = TRUE)) * gprime) / (exp(as.vector(fXy1) + dbinom(y - treat, size = J, prob = hX1, log = TRUE) + log(as.vector(gX))) + exp(as.vector(fXy0) + dbinom(y, size = J, prob = hX0, log = TRUE) + log(as.vector(1 - gX))))
            partdel.4 <- (exp(as.vector(fXy1) + dbinom(y, size = J, prob = hX1, log = TRUE)) * gprime - exp(as.vector(fXy0) + dbinom(y, size = J, prob = hX0, log = TRUE)) * gprime) / (exp(as.vector(fXy1) + dbinom(y, size = J, prob = hX1, log = TRUE) + log(as.vector(gX))) + exp(as.vector(fXy0) + dbinom(y, size = J, prob = hX0, log = TRUE) + log(as.vector(1 - gX))))
        }
        partdel.3r <- partdel.3 * ind1y
        partdel.4r <- partdel.4 * nottreat ## Not defined where y = 4
        idx <- apply(is.na(partdel.4r), 1, all)
        cero <- rep(0, ncol(x))  
        partdel.4r[idx,] <- cero
        
        partdel <- partdel.1r + partdel.2r + partdel.3r + partdel.4r
        
        hpyx0 <- as.vector(dbinom(y, size = J, prob = hX0, log = FALSE) * (y - J * hX0)) * x.h0 #h prime y = y, z = 0
        hpy1x1 <- as.vector(dbinom(y - treat, size = J, prob = hX1, log = FALSE) * ((y - treat) - J * hX1)) * x.h1 #hprime y = y-1, z = 1
        hpy1x0 <- as.vector(dbinom(y, size = J, prob = hX1, log = FALSE) * (y - J * hX1)) * x.h1 #hprime y = y, z = 1
                                        #last two are exactly the same thing, but for treat vs. control group
        partpsi.1 <- as.vector(y - J * hX0) * x.h0
        partpsi.1r <- partpsi.1 * ind10
    
        partpsi.2 <- as.vector((y - treat) - J * hX1) * x.h1
        partpsi.2r <- partpsi.2 * ind1J1

        if (outcome.reg == "logistic") {
            partpsi.3 <- (as.vector(fXy1) * hpy1x1 * as.vector(gX) + as.vector(fXy0) * hpyx0 * as.vector(1 - gX)) / (as.vector(fXy1) * dbinom(y - treat, size = J, prob = hX1, log = FALSE) * as.vector(gX) + as.vector(fXy0) * dbinom(y, size = J, prob = hX0, log = FALSE) * as.vector((1 - gX)))
            partpsi.4 <- (as.vector(fXy1) * hpy1x0 * as.vector(gX) + as.vector(fXy0) * hpyx0 * as.vector(1 - gX)) / (as.vector(fXy1) * dbinom(y, size = J, prob = hX1, log = FALSE) * as.vector(gX) + as.vector(fXy0) * dbinom(y, size = J, prob = hX0, log = FALSE) * as.vector(1 - gX))
        } else if (outcome.reg == "linear") {
            partpsi.3 <- (exp(as.vector(fXy1) + log(as.vector(gX))) * hpy1x1 + hpyx0 * exp(log(as.vector(1 - gX)) + as.vector(fXy0))) / (exp(as.vector(fXy1) + dbinom(y - treat, size = J, prob = hX1, log = TRUE) + log(as.vector(gX))) + exp(as.vector(fXy0) + dbinom(y, size = J, prob = hX0, log = TRUE) + log(as.vector((1 - gX)))))
            partpsi.4 <- (exp(as.vector(fXy1) + log(as.vector(gX))) * hpy1x0 + hpyx0 * exp(log(as.vector(1 - gX)) + as.vector(fXy0))) / (exp(as.vector(fXy1) + dbinom(y, size = J, prob = hX1, log = TRUE) + log(as.vector(gX))) + exp(as.vector(fXy0) + dbinom(y, size = J, prob = hX0, log = TRUE) + log(as.vector(1 - gX))))
        }
        partpsi.3r <- partpsi.3 * ind1y
        partpsi.4r <- partpsi.4 * nottreat ## Not defined where y = 4.
        
        idx <- apply(is.na(partpsi.4r), 1, all)
        if (constrained == FALSE) {
            cero <- rep(0, ncol(x) + 1)
        } else if (constrained == TRUE) {
            cero <- rep(0, ncol(x))
        }
        partpsi.4r[idx,] <- cero
        
        partpsi <- partpsi.1r + partpsi.2r + partpsi.3r + partpsi.4r

        ssize <- nrow(x.all)
        S <- cbind(partpsi, partdel, parttheta) #control, treat, outcome 
        info <- (t(S) %*% S) / ssize
        vcov <- solve(info) / ssize #Hessian / n
        ses <- sqrt(diag(vcov))
        returnses <- list(ses, vcov)
        return(returnses)
    }

    if (outcome.reg == "linear") {
        sigma <- par[3*nPar + 4]
    } else {
        sigma <- NA
    }
    ses <- score(par = par, J = 3, y = y.all, treat = treat, o = o,
                 x = x.all, outcome.reg = outcome.reg, constrained = constrained)
    
    if(constrained == FALSE) {
        par.control <- par[1:(nPar+1)]
    } else {
        par.control <- par[1:(nPar)]
    }
    
    if(constrained == FALSE) {
        par.treat <- par[(nPar+2):(2*nPar+1)]
    } else {
        par.treat <- par[(nPar+1):(2*nPar)]
    }
    
    if(constrained == FALSE) {
        par.outcome <- par[(2*nPar + 2):(3*nPar + 3)]
    } else {
        par.outcome <- par[(2*nPar + 1):(3*nPar + 1)]
    }
    

    return.object <- list(par, par.control,
                          par.treat, par.outcome, sigma,
                          counter, ses, w)
    names(return.object) <- list("par", "par.control",
                                 "par.treat",
                                 "par.outcome", "sigma", "no.iterations", "ses", "w")
    class(return.object) <- "ictreg.joint.sim"
    return(return.object)
}

#######################################
###### Create ictregj function  #######
####### FOR TWO STEP ESTIMATORS #######
#######################################

ictreg.joint.two <- function(par, treat, J, outcome.reg = "logistic", x.all, y.all, constrained = FALSE,
                             bayes = FALSE, maxIter = 1000, priorscale = 2.5,...) {

    obs.llik.binom.std <- function(par, J, y, treat, x, constrained = FALSE, bayes = FALSE, priorscale = 2.5) {
        k <- ncol(x) ## number of covariates
        if (constrained == FALSE) { ## unconstrained model
            coef.h <- par[1:(k+1)]
            coef.g <- par[(k+2):(2*k+1)]
            x.h0 <- cbind(x,0)
            x.h1 <- cbind(x,1)
            hX0 <- logistic(x.h0 %*% coef.h) ## z = 0
            hX1 <- logistic(x.h1 %*% coef.h) ## z = 1
            gX <- logistic(x %*% coef.g)
        } else{ ## constrained model
            coef.h <- par[1:(k)]
            coef.g <- par[(k+1):(2*k)]
            x.h0 <- cbind(x)
            x.h1 <- cbind(x)
            hX0 <- logistic(x.h0 %*% coef.h)
            hX1 <- logistic(x.h1 %*% coef.h)
            gX <- logistic(x %*% coef.g)
        }
        ind10 <- ((treat == 1) & (y == 0))
        ind1J1 <- ((treat == 1) & (y == (J+1)))
        ind1y <- ((treat == 1) & (y > 0) & (y < (J+1)))
        nottreat <- (treat==0)
        if (sum(ind10) > 0) {
            p10 <- sum(log(1-gX[ind10])
                       + dbinom(x = y[ind10], size = J, prob = hX0[ind10], log = TRUE))
        } else {
            p10 <- 0
        }
        if (sum(ind1J1) > 0) {
            p1J1 <- sum(log(gX[ind1J1])
                        + dbinom(y[ind1J1] - 1, size = J, prob = hX1[ind1J1], log = TRUE))
        } else {
            p1J1 <- 0
        }
        if (sum(ind1y) > 0) {
            p1y <- sum(log(exp(log(gX[ind1y]) + dbinom(y[ind1y]-1, size = J, prob = hX1[ind1y],log = TRUE))
                           + exp(log(1-gX[ind1y]) + dbinom(y[ind1y], size = J, prob = hX0[ind1y], log = TRUE))))
        } else {
            p1y <- 0
        }
        if (sum(treat == 0) > 0) {
            p0y <- sum(log(exp(log(gX[nottreat]) + dbinom(y[nottreat], size = J, prob = hX1[nottreat], log = TRUE)) 
                           + exp(log(1-gX[!treat]) + dbinom(y[nottreat], size = J, prob = hX0[nottreat], log = TRUE)))) 
        } else {
            p0y <- 0
        }
        if (bayes == TRUE) {
            pprior <- sum(dcauchy(x = coef.f, scale = rep(priorscale, length(coef.f)), log = TRUE))
        } else {
            pprior <- 0
        }
        return(p10+p1J1+p1y+p0y+pprior)
    }
    
    ## Estep (weights)
    Estep.binom.std <- function(par, J, y, treat, x, constrained = FALSE) {
        k <- ncol(x) ## number of covariates
        if(constrained == FALSE){ ## unconstrained model
            coef.h <- par[1:(k+1)]
            coef.g <- par[(k+2):(2*k+1)]
            x.h0 <- cbind(x,0)
            x.h1 <- cbind(x,1)
            hX0 <- logistic(x.h0 %*% coef.h) ## z = 0
            hX1 <- logistic(x.h1 %*% coef.h) ## z = 1
            gX <- logistic(x %*% coef.g)
        } else{ ## constrained model
            coef.h <- par[1:(k)]
            coef.g <- par[(k+1):(2*k)] 
            x.h0 <- cbind(x)
            x.h1 <- cbind(x)
            hX0 <- logistic(x.h0 %*% coef.h) 
            hX1 <- logistic(x.h1 %*% coef.h)   
            gX <- logistic(x %*% coef.g)
        }  
        ind <- !((treat == 1) & ((y == 0) | (y == (J+1))))
        w <- rep(NA, length(y))
        w[ind] <- exp(log(gX[ind]) + dbinom((y-treat)[ind], size = J, prob = hX1[ind], log = TRUE) 
                      - log(exp(log(gX[ind]) + dbinom((y-treat)[ind], size = J, prob = hX1[ind], log = TRUE))
                            + exp(log(1-gX[ind]) + dbinom(y[ind], size = J, prob = hX0[ind], log = TRUE))))
        w[(treat == 1) & (y == 0)] <- 0
        w[(treat == 1) & (y == (J+1))] <- 1
        return(w)
    }
       
    ## Mstep 1: g (sensitive item)
    
    wlogit.fit.std <- function(y, treat, x, w, par = NULL, constrained = FALSE) {
        nPar <- ncol(x)
        if(constrained == FALSE) {
            par <- par[(nPar+2):(2*nPar+1)]
        }
        else {
            par <- par[(nPar+1):(2*nPar)]
        }
        yrep <- rep(c(1,0), each = length(y))
        xrep <- rbind(x, x)
        wrep <- c(w, 1-w)
        yrep <- yrep[wrep > 0]
        xrep <- xrep[wrep > 0, ]
        wrep <- wrep[wrep > 0]
        return(glm(cbind(yrep, 1-yrep) ~ xrep - 1, weights = wrep, family = binomial(logit), start = par))
    }
        
    ## Mstep 3: h (non-sensitive)
    wbinomial.fit <- function(y, treat, x, w, par = NULL, constrained = FALSE) {
        nPar <- ncol(x)
        yrep <- c(y - treat, y)
        xrep <- rbind(x, x)
        zrep <- rep(c(1, 0), each = length(y))
        wrep <- c(w, 1-w)
        yrep <- yrep[wrep > 0]
        xrep <- xrep[wrep > 0, ]
        zrep <- zrep[wrep > 0]
        wrep <- wrep[wrep > 0]
        if(constrained == FALSE){
            par <- par[1:(nPar+1)]
            fit <- glm(cbind(yrep, J - yrep) ~ xrep + zrep - 1, weights = wrep, family = binomial(logit), start = par)
        } else {
            par <- par[1:(nPar)]
            fit <- glm(cbind(yrep, J - yrep) ~ xrep - 1, weights = wrep, family = binomial(logit), start = par)
        }
        return(fit)
    }
    
########################################
########## NOW THE ALGORITHM ###########

    ## start off with an infinitely negative log likelihood
    pllik.const <- -Inf
    counter <- 0
    nPar <- ncol(x.all)
    
    ## calculate the log likelihood at the starting values
    llik.const <- obs.llik.binom.std(par, J = J, y = y.all, treat = t,
                                     x = x.all, constrained = constrained, bayes = bayes)
    
    
    while (((llik.const - pllik.const) > 10^(-4)) & (counter < maxIter)) { ## difference is bigger than an arbitrarily small number  
        w <- Estep.binom.std(par, J, y.all, treat = t, x.all)
        
        ## treatment 
        lfit <- wlogit.fit.std(y.all, treat = t, x.all, w, par = par) 
               
        ## control 
        fit <- wbinomial.fit(y = y.all, treat = t, x = x.all, w = w, par = par,
                             constrained = constrained)
        
        par <- c(coef(fit), coef(lfit))
        
        pllik.const <- llik.const ## make old llik the new one
        
        ## calculate the new log likelihood
        llik.const <- obs.llik.binom.std(par, J = J, y = y.all, treat = t,
                                         x = x.all, constrained = constrained,
                                         bayes = bayes)
        
        counter <- counter + 1 ## up the counter
        
        if (llik.const < pllik.const)
            warning("log-likelihood is not monotonically increasing.")  
        if(counter == (maxIter-1))
            warning("number of iterations exceeded maximum in ML")
        
        cat("llik:", llik.const, "\n")
        cat("llik diff:", llik.const - pllik.const, "\n")
        cat("par:", par, "\n")
        
    } ## end of while loop
    
    if(constrained == FALSE) {
        par.control <- par[1:(nPar+1)]
    } else {
        par.control <- par[1:(nPar)]
    }
    
    if(constrained == FALSE) {
        par.treat <- par[(nPar+2):(2*nPar+1)]
    } else {
        par.treat <- par[(nPar+1):(2*nPar)]
    }

    MLEfit <- optim(par, obs.llik.binom.std, method = "BFGS", J = J, y = y.all,
                    treat = t, x = x.all, hessian = TRUE, control = list(maxit = 0))
    print(MLEfit)
    vcov.mle <- solve(-MLEfit$hessian)
    print(vcov.mle)
    se.mle <- sqrt(diag(vcov.mle))

    ## Predicted values
    preds.z <- logistic(x.all %*% par.treat)
    preds.y0 <- (J * logistic(cbind(x.all, 1) %*% par.control) * preds.z) +
        (J * logistic(cbind(x.all, 0) %*% par.control) * (1-preds.z))
    
    return.object <- list(par, par.control, par.treat,
                          counter, preds.z, preds.y0,
                          y.all, treat, x.all, w, vcov = vcov.mle, se = se.mle)
    names(return.object) <- list("par", "par.control", "par.treat", "no.iterations",
                                 "predicted.values.z", "predicted.values.y0", "y", "treat", "x", "w")
    class(return.object) <- "ictreg.joint.sim"
    return.object
}

wlogit.fit.outcome2 <- function(y, treat, x, w, o, par = NULL, constrained = FALSE, bayes = FALSE, outcome.reg = "logistic") {   
    yrep <- c((y - treat), y)
    xrep <- rbind(x, x)
    zrep <- rep(c(1,0), each = length(y))
    wrep <- c(w, 1-w)
    orep <- c(o,o)
    nPar <- ncol(x)
    yrep <- yrep[wrep > 0]
    orep <- orep[wrep > 0]
    xrep <- xrep[wrep > 0, ]
    zrep <- zrep[wrep > 0]
    wrep <- wrep[wrep > 0]
    if(constrained == FALSE){
        if(outcome.reg == "logistic" & bayes == FALSE){
            par <- par[(2*nPar + 2):(3*nPar + 3)]
            fit <- glm(cbind(orep, 1-orep) ~ xrep + yrep + zrep - 1, weights = wrep, family = quasibinomial(logit),
                       start = par)
        } else if(outcome.reg == "logistic" & bayes == TRUE){
            par <- par[(2*nPar + 2):(3*nPar + 3)]
            fit <- bayesglm(cbind(orep, 1-orep) ~ xrep + yrep + zrep - 1,
                            weights = wrep, family = binomial(logit),
                            start = par, prior.scale = priorscale)
        } else if(outcome.reg == "multinomial"){
            fit <- multinom(cbind(orep, 1-orep) ~ xrep + yrep + zrep - 1, weights = wrep, start = par)
        } else if(outcome.reg == "linear"){
            par <- par[(2*nPar + 2):(3*nPar + 3)]
            fit <- lm(orep ~ xrep + yrep + zrep - 1, weights = wrep)
        }
    } else { #constrained
        if(outcome.reg == "logistic" & bayes == FALSE){
            par <- par[(2*nPar + 1):(3*nPar + 1)]
            fit <- glm(cbind(orep, 1-orep) ~ xrep + zrep - 1, weights = wrep, family = quasibinomial(logit), start = par)
        } else if(outcome.reg == "logistic" & bayes == TRUE) {
            par <- par[(2*nPar + 1):(3*nPar + 1)]               
            fit <- bayesglm(cbind(orep, 1-orep) ~ xrep + zrep - 1, weights = wrep, family = binomial(logit),
                            start = par, prior.scale = priorscale)    
        } else if(outcome.reg == "multinomial"){
            fit <- multinom(cbind(orep, 1-orep) ~ xrep + zrep - 1, weights = wrep, start = par)
        } else if(outcome.reg == "linear"){
            par <- par[(2*nPar + 1):(3*nPar + 1)]
            fit <- lm(orep ~ xrep + yrep + zrep - 1, weights = wrep)
        }
        }
    return(fit)
}

###
### standard error for two-step estimator
###


twostep.vcov <- function(par, J, y, treat, x, o, w, first.stage.vcov = NULL, constrained = TRUE) {
    k <- ncol(x)
    if (constrained == FALSE) { ## unconstrained model
        coef.h <- par[1:(k+1)]
        coef.g <- par[(k+2):(2*k+1)]
        x.h0 <- cbind(x,0)
        x.h1 <- cbind(x,1)
        coef.f <- par[(2*k + 2):length(par)]
        x.fy0 <- cbind(x, as.matrix(y), 0) #where z = 0
        x.fy1 <- cbind(x, as.matrix(y - treat), 1)
    } else { ## constrained model
        k <- ncol(x)
        coef.h <- par[1:(k)]
        coef.g <- par[(k+1):(2*k)]
        x.h0 <- x.h1 <- cbind(x)
        coef.f <- par[(2*k + 1):length(par)]
        x.fy0 <- cbind(x, 0) #where z = 0
        x.fy1 <- cbind(x, 1)
    }
    hX0 <- logistic(x.h0 %*% coef.h) ## z = 0
    hX1 <- logistic(x.h1 %*% coef.h) ## z = 1
    gX <- as.vector(logistic(x %*% coef.g))

    ind10 <- ((treat == 1) & (y == 0))
    ind1J1 <- ((treat == 1) & (y == (J+1)))
    ind1y <- ((treat == 1) & (y > 0) & (y < (J+1)))
    nottreat <- !treat       
    gprime <- gX * (1 - gX) * x

    ## partial with respect to delta
    partdel.1 <- -gX * x * ind10      
    partdel.2 <- (1 - gX) * x * ind1J1
    partdel.3 <- (dbinom(y - treat, size = J, prob = hX1, log = FALSE) - dbinom(y, size = J, prob = hX0, log = FALSE)) * gprime / (dbinom(y - treat, size = J, prob = hX1, log = FALSE) * gX + dbinom(y, size = J, prob = hX0, log = FALSE) * (1 - gX))
    partdel.4 <- (dbinom(y, size = J, prob = hX1) -  dbinom(y, size = J, prob = hX0, log = FALSE)) * gprime / (dbinom(y, size = J, prob = hX1) * gX + dbinom(y, size = J, prob = hX0, log = FALSE) * (1 - gX))
    partdel.3 <- partdel.3 * ind1y
    partdel.4 <- partdel.4 * nottreat ## Not defined where y = 4
    idx <- apply(is.na(partdel.4), 1, all)
    partdel.4[idx,] <- 0
    
    partdel <- partdel.1 + partdel.2 + partdel.3 + partdel.4

    ## partial with respect to psi
    hpyx0 <- as.vector(dbinom(y, size = J, prob = hX0, log = FALSE) * (y - J * hX0)) * x.h0 #h prime y = y, z = 0
    hpy1x1 <- as.vector(dbinom(y - treat, size = J, prob = hX1, log = FALSE) * ((y - treat) - J * hX1)) * x.h1 #hprime y = y-1, z = 1
    hpy1x0 <- as.vector(dbinom(y, size = J, prob = hX1, log = FALSE) * (y - J * hX1)) * x.h1 #hprime y = y, z = 1
  
    partpsi.1 <- as.vector(y - J * hX0) * x.h0 * ind10
    partpsi.2 <- as.vector((y - treat) - J * hX1) * x.h1 * ind1J1
    partpsi.3 <- (hpy1x1 * gX + hpyx0 * as.vector(1 - gX)) / (dbinom(y - treat, size = J, prob = hX1, log = FALSE) * gX + dbinom(y, size = J, prob = hX0, log = FALSE) * (1 - gX))
    partpsi.4 <- (hpy1x0 * gX + hpyx0 * as.vector(1 - gX)) / (dbinom(y, size = J, prob = hX1, log = FALSE) * gX + dbinom(y, size = J, prob = hX0, log = FALSE) * (1 - gX))
    partpsi.3 <- partpsi.3 * ind1y
    partpsi.4 <- partpsi.4 * nottreat ## Not defined where y = 4.
        
    idx <- apply(is.na(partpsi.4), 1, all)
    partpsi.4[idx,] <- 0
        
    partpsi <- partpsi.1 + partpsi.2 + partpsi.3 + partpsi.4

    ## F1, G1
    ssize <- nrow(x.all)
    F1 <- crossprod(cbind(partpsi, partdel)) / ssize
    if (is.null(first.stage.vcov)) {
        G1 <- F1 
    } else {
        G1 <- first.stage.vcov * ssize
    }
    
    ## F2 
    x.star <- cbind(x, w, y - treat * w)
    m0 <- - as.vector(o - x.star %*% coef.f) * x.star
    F2 <- crossprod(m0) / ssize

    ## F
    F <- rbind(cbind(F1, matrix(0, ncol = ncol(F2), nrow = nrow(F1))),
               cbind(matrix(0, ncol = ncol(F1), nrow = nrow(F2)), F2))
    
    ## G3
    G3 <- crossprod(x.star) / ssize
    
    ## partial of w with respect to psi and delta
    w.psi <- (hpy1x1 * dbinom(y, size = J, prob = hX0, log = FALSE) - dbinom(y - treat, size = J, prob = hX1, log = FALSE) * hpyx0) * gX * (1-gX) / (dbinom(y - treat, size = J, prob = hX1, log = FALSE) * gX + dbinom(y, size = J, prob = hX0, log = FALSE) * (1 - gX))^2
    w.delta <- dbinom(y - treat, size = J, prob = hX1, log = FALSE) * dbinom(y, size = J, prob = hX0, log = FALSE) * gprime / (dbinom(y - treat, size = J, prob = hX1, log = FALSE) * gX + dbinom(y, size = J, prob = hX0, log = FALSE) * (1 - gX))^2
    
    ## G2
    G2 <- matrix(0, ncol = ncol(G1), nrow = nrow(G3)) 
    
    partial.m0.psi <- array(0, dim = c(ssize, length(coef.h), ncol(x.star)))
    partial.m0.psi[,,ncol(x)+1] <- as.vector(o - x.star %*% coef.f) * w.psi
    partial.m0.psi[,,ncol(x)+2] <- - as.vector(o - x.star %*% coef.f) * treat * w.psi
    
    tmp <- as.vector(coef.f[length(coef.f)-1] - coef.f[length(coef.f)] * treat) * w.psi
    for (i in 1:ssize) {
        G2[, 1:dim(partial.m0.psi)[2]] <- G2[, 1:dim(partial.m0.psi)[2]] +
            t(partial.m0.psi[i,,] - matrix(tmp[i, ], ncol = 1) %*% matrix(x.star[i, ], nrow = 1))
    }
    
    partial.m0.delta <- array(0, dim = c(ssize, length(coef.g), ncol(x.star)))
    partial.m0.delta[,,ncol(x)+1] <- as.vector(o - x.star %*% coef.f) * w.delta
    partial.m0.delta[,,ncol(x)+2] <- - as.vector(o - x.star %*% coef.f) * treat * w.delta

    tmp <- as.vector(coef.f[length(coef.f)-1] - coef.f[length(coef.f)] * treat) * w.delta
    for (i in 1:ssize) {
        G2[, (dim(partial.m0.psi)[2]+1):ncol(G2)] <- G2[, (dim(partial.m0.psi)[2]+1):ncol(G2)] +
            t(partial.m0.delta[i,,] - matrix(tmp[i, ], ncol = 1) %*% matrix(x.star[i, ], nrow = 1))
    }
    G2 <- G2/ssize
    
    ## G
    G <- cbind(rbind(G1, G2), rbind(matrix(0, ncol = ncol(G3), nrow = nrow(G1)), G3))

    ## var
    V <- solve(G) %*% F %*% t(solve(G))

    return(V/ssize)
    
}

###########################################
############# Run simulations #############

library("MASS")
set.seed(2) 
logistic <- function(x) exp(x)/(1+exp(x))
J <- 3

## Set parameters
true.treat <- c(0.5, 1) ## x1, x2
true.control <- c(1, 1, 1) ## x1, x2, z
true.outcome <- c(-1, 1, 1, 0.5) ## x1, x2, Y0, z

true.sigma <- 0.5
truth <- c(true.control, true.treat, true.outcome, true.sigma)
truth.s1 <- c(true.control, true.treat)
ssize <- 1500 
cores <- 1
sims <- 1
boot <- 1
returnlist <- list()#ultimate return object -- created outside the foreach loop

sumpars <- foreach(j = 1:cores) %do% {
    onestep.means <- matrix(NA, nrow = sims, ncol = 10) #creates a new holder in each core. rows are sims, columns are parameters
    twostep.means <- matrix(NA, nrow = sims, ncol = 9)
    naive.means <- matrix(NA, nrow = sims, ncol = 4)

    ses.onestep <- matrix(NA, nrow = sims, ncol = 10)
    ses.twostep <- matrix(NA, nrow = sims, ncol = 9)
    ses.naive <- matrix(NA, nrow = sims, ncol = 4)
    
    onestep.sd <- matrix(NA, nrow = sims, ncol = 10) #creates a new holder in each core. rows are sims, columns are parameters
    twostep.sd <- matrix(NA, nrow = sims, ncol = 9)
    naive.sd <- matrix(NA, nrow = sims, ncol = 4)
    
    onestep.lower <- matrix(NA, nrow = sims, ncol = 10) #creates a new holder in each core. rows are sims, columns are parameters
    twostep.lower <- matrix(NA, nrow = sims, ncol = 9)
    naive.lower <- matrix(NA, nrow = sims, ncol = 4)
    
    onestep.upper <- matrix(NA, nrow = sims, ncol = 10) #creates a new holder in each core. rows are sims, columns are parameters
    twostep.upper <- matrix(NA, nrow = sims, ncol = 9)
    naive.upper <- matrix(NA, nrow = sims, ncol = 4)
    
    #firststep.treat <- matrix(NA, nrow = sims, ncol = 2)
    #firststep.control <- matrix(NA, nrow = sims, ncol = 3)

    library(magic)
    library(MASS)
    library(mvtnorm)
    for(n in 1:sims){ #create 125 data frames in each core; perform 1000 BS on each
        dat1 <- 1
        dat2 <- mvrnorm(ssize, mu = -1, Sigma = 1)
        dat <- as.data.frame(cbind(dat1, dat2))
        
        ## treatment group
        gX <- logistic(as.matrix(dat) %*% true.treat)
        
        ## control group
        z <- rbinom(ssize, size = 1, prob = gX)
        dat <- cbind(dat, z)
        hX <- logistic(as.matrix(dat) %*% true.control)
        dat$Y0 <- rbinom(ssize, size = J, prob = hX)
        
        ## Treat
        dat$treat <- sample(rep(c(0,1), ssize/2), size = ssize, replace = FALSE)
        
        ## Y
        dat$y[dat$treat == 1] <- dat$Y0[dat$treat == 1] + z[dat$treat == 1]
        dat$y[dat$treat == 0] <- dat$Y0[dat$treat == 0]
        
        ## Outcome 
        cols <- c("dat1", "V2", "Y0", "z")        
        fX <- as.matrix(dat[,cols]) %*% true.outcome        
        dat$o <- rnorm(ssize, mean = fX, sd = true.sigma)

        ## identify treatment, outcome, covariates, observed response (y.all), J
        x.all <- as.matrix(cbind(dat$dat1, dat$V2))
        treat <- dat$treat
        t <- dat$treat
        y.all <- dat$y
        J <- 3
        o <- dat$o
        
        ## starting values
        dat.treat <- dat[dat$treat == 1,]
        dat.control <- dat[dat$treat == 0,]
        start.fit.control <- lm(y ~ dat1 + V2 - 1, data = dat.control)
        start.fit.treat <- lm(y ~ dat1 + V2 - 1, data = dat.treat)
        start.fit.outcome <- lm(o ~ dat1 + V2 + Y0 - 1,
                                data = dat)
       
        coef.z.start <- 0.75
        coef.control.start <- c(start.fit.control$coefficients, coef.z.start)
        coef.treat.start <- start.fit.treat$coefficients
        par <- c(coef.control.start, coef.treat.start) #two step estimators run ictreg function
                                                       #without outcome model in the first step

        sigma.start <- sigma <- summary(start.fit.outcome)$sigma
        coef.outcome.start <- c(start.fit.outcome$coefficients, coef.z.start)
        par.onestep <- c(coef.control.start, coef.treat.start, coef.outcome.start, sigma.start) #one step estimator runs
                                                                                             #with all three models at once
    
## Set up to bootstrap if desired, but does NOT bootstrap when boot = 1, as now

        sq <- seq(from = 1, to = nrow(dat), by = 1)
        choose <- list()
        twostep <- c()
        onestep <- c()
        naive <- c()
        onestepses <- c()
        twostepses <- c()
        naiveses <- c()

        for(i in 1:boot) { # still within one of the 125 datasets, bootstrap WITHIN that dataset
            choose[[i]] <- sample(sq, size = length(sq), replace = TRUE)
            bootdat <- dat[choose[[i]],]

            bootdat <- dat

            x.all <- as.matrix(cbind(bootdat$dat1, bootdat$V2))
            treat <- bootdat$treat
            t <- bootdat$treat
            y.all <- bootdat$y
            J <- 3
            o <- bootdat$o

            ## For each bootstrap dataset, run each estimator
            
            ## One step estimator
            step1.1 <- ictreg.joint.one(par = par.onestep, treat = t, y.all = y.all,
                                  J = 3, x.all = x.all, constrained = FALSE,
                                  bayes = F, maxIter = 1000, outcome.reg = "linear", o = o)
            onestep <- rbind(onestep, step1.1$par)
            onestepses <- rbind(onestepses, step1.1$ses[[1]])
            
            ## Step 1       
            step1 <- ictreg.joint.two(par = par, treat = t, y.all = y.all,
                                      J = 3, x.all = x.all, constrained = FALSE,
                                      bayes = F, maxIter = 1000)

            ## Second step
            sensitive <- step1$w
            control <- y.all - t * sensitive
            step2 <- lm(o ~ -1 + x.all + control + sensitive)
            twostep.temp <- c(step1$par[1:(2*ncol(x.all) + 1)], coef(step2))
            twostep <- rbind(twostep, c(step1$par[1:(2*ncol(x.all) + 1)], coef(step2)))
            V <- twostep.vcov(twostep.temp, J, y.all, t, x.all, o, step1$w, constrained = FALSE)
            twostepses <- rbind(twostepses, sqrt(diag(V)))

            ## Naive second step
            naive.lm <- lm(o ~ -1 + x.all + step1$predicted.values.y0 + step1$predicted.values.z)
            naiveses <- rbind(naiveses, coef(summary(naive.lm))[, 2])
            naive <- rbind(naive, coef(summary(naive.lm))[, 1])
            
        } ## End of bootstrap. Have three matrices of parameter estimates (one for each estimator), boots x par
        
        lower.twostep <- apply(twostep, 2, quantile, probs = 0.05)
        upper.twostep <- apply(twostep, 2, quantile, probs = 0.95)
        sd.twostep <- apply(twostep, 2, sd)
        means.twostep <- apply(twostep, 2, mean)
        twostep.ses <- apply(twostepses, 2, mean)
        ci.twostep <- cbind(lower.twostep, upper.twostep,
                             sd.twostep, means.twostep, twostep.ses)

        lower.onestep <- apply(onestep, 2, quantile, probs = 0.05)
        upper.onestep <- apply(onestep, 2, quantile, probs = 0.95)
        sd.onestep <- apply(onestep, 2, sd)
        means.onestep <- apply(onestep, 2, mean)
        onestep.ses <- apply(onestepses, 2, mean)
        ci.onestep <- cbind(lower.onestep, upper.onestep,
                             sd.onestep, means.onestep, onestep.ses)

        lower.naive <- apply(naive, 2, quantile, probs = 0.05)
        upper.naive <- apply(naive, 2, quantile, probs = 0.95)
        sd.naive <- apply(naive, 2, sd)
        means.naive <- apply(naive, 2, mean) 
        naive.ses <- apply(naiveses, 2, mean)
        ci.naive <- cbind(lower.naive, upper.naive,
                             sd.naive, means.naive, naive.ses)
        
        onestep.means[n,] <- ci.onestep[,4] 
        twostep.means[n,] <- ci.twostep[,4]
        naive.means[n,] <- ci.naive[,4]
        onestep.sd[n,] <- ci.onestep[,3] 
        twostep.sd[n,] <- ci.twostep[,3]
        naive.sd[n,] <- ci.naive[,3]
        onestep.lower[n,] <- ci.onestep[,1] 
        twostep.lower[n,] <- ci.twostep[,1]
        naive.lower[n,] <- ci.naive[,1]
        onestep.upper[n,] <- ci.onestep[,2] 
        twostep.upper[n,] <- ci.twostep[,2]
        naive.upper[n,] <- ci.naive[,2]
        ses.onestep[n,] <- ci.onestep[,5]
        ses.twostep[n,] <- ci.twostep[,5]
        ses.naive[n,] <- ci.naive[,5]

        #firststep.treat[n,] <- treat.twostep2
        #firststep.control[n,] <- control.twostep2
        
        returnlist <- list(onestep.means, twostep.means, naive.means,
                           onestep.sd, twostep.sd, naive.sd,
                           onestep.lower, twostep.lower, naive.lower,
                           onestep.upper, twostep.upper, naive.upper,
                           ses.onestep, ses.twostep, ses.naive)
        
         names(returnlist) <- c("onestep.means", "twostep.means", "naive.means",
                                "onestep.sd", "twostep.sd", "naive.sd",
                                "onestep.lower", "twostep.lower", "naive.lower",
                                "onestep.upper", "twostep.upper", "naive.upper",
                                "ses.onestep", "ses.twostep", "ses.naive")
                    
    } # END SIMS
    return(returnlist) # outside list is cores, inside elements are named. Rows of the individual elements are simulations; colums are parameters.
    
}# END FOREACH LOOP (cores come back together)

############################################
####### Load our draws & make tables #######

load("ss1000.RData")

mean.par.onestep <- apply(ss1000$onestep.means, 2, mean)
mean.par.twostep2pars <- apply(ss1000$twostep.means, 2, mean)
mean.par.naivepars <- apply(ss1000$naive.means, 2, mean)

median.par.naivepars <- apply(ss1000$naive.means, 2, median)
bias.naive.median <- abs(median.par.naivepars - true.outcome)

median.par.onestep <- apply(ss1000$onestep.means, 2, median)
bias.onestep.median <- abs(median.par.onestep - true.outcome)

median.par.twostep2pars <- apply(ss1000$twostep.means, 2, median)
bias.twostep2pars.median <- abs(median.par.twostep2pars - true.outcome)

subtract <- t(t(ss1000$onestep.means) - true.outcome)
subtract.sq <- subtract ^ 2
rmse.onestep <- sqrt(apply(subtract.sq, 2, mean))

subtract <- t(t(ss1000$twostep.means) - true.outcome)
subtract.sq <- subtract ^ 2
rmse.twostep2pars <- sqrt(apply(subtract.sq, 2, mean))

subtract <- t(t(ss1000$naive.means) - true.outcome)
subtract.sq <- subtract ^ 2
rmse.naive <- sqrt(apply(subtract.sq, 2, mean))

rmse <- rbind(rmse.onestep, rmse.twostep2pars, rmse.naive)
bias <- rbind(bias.onestep.median, bias.twostep2pars.median, bias.naive.median)
means <- rbind(mean.par.onestep, mean.par.twostep2pars, mean.par.naivepars)

table1000 <- rbind(means, bias, rmse)
rownames <- c("One Step Mean", "Two Step Mean", "Naive Mean", "One Step Median Deviation", "Two Step Median Deviation", "Naive Median Deviation", "One Step RMSE", "Two Step RMSE", "Naive RMSE")
colnames(table1000) <- c("X1 Outcome", "X2 Outcome", "Y0 Outcome", "Z Outcome")
rownames(table1000) <- rownames
table1000

load("ss1500.RData")

mean.par.onestep <- apply(ss1500$onestep.means, 2, mean)
mean.par.twostep2pars <- apply(ss1500$twostep.means, 2, mean)
mean.par.naivepars <- apply(ss1500$naive.means, 2, mean)

median.par.naivepars <- apply(ss1500$naive.means, 2, median)
bias.naive.median <- abs(median.par.naivepars - true.outcome)

median.par.onestep <- apply(ss1500$onestep.means, 2, median)
bias.onestep.median <- abs(median.par.onestep - true.outcome)

median.par.twostep2pars <- apply(ss1500$twostep.means, 2, median)
bias.twostep2pars.median <- abs(median.par.twostep2pars - true.outcome)

subtract <- t(t(ss1500$onestep.means) - true.outcome)
subtract.sq <- subtract ^ 2
rmse.onestep <- sqrt(apply(subtract.sq, 2, mean))

subtract <- t(t(ss1500$twostep.means) - true.outcome)
subtract.sq <- subtract ^ 2
rmse.twostep2pars <- sqrt(apply(subtract.sq, 2, mean))

subtract <- t(t(ss1500$naive.means) - true.outcome)
subtract.sq <- subtract ^ 2
rmse.naive <- sqrt(apply(subtract.sq, 2, mean))

rmse <- rbind(rmse.onestep, rmse.twostep2pars, rmse.naive)
bias <- rbind(bias.onestep.median, bias.twostep2pars.median, bias.naive.median)
means <- rbind(mean.par.onestep, mean.par.twostep2pars, mean.par.naivepars)

table1500 <- rbind(means, bias, rmse)
rownames <- c("One Step Mean", "Two Step Mean", "Naive Mean", "One Step Median Deviation", "Two Step Median Deviation", "Naive Median Deviation", "One Step RMSE", "Two Step RMSE", "Naive RMSE")
colnames(table1500) <- c("X1 Outcome", "X2 Outcome", "Y0 Outcome", "Z Outcome")
rownames(table1500) <- rownames
table1500


load("ss2500.RData")

mean.par.onestep <- apply(ss2500$onestep.means, 2, mean)
mean.par.twostep2pars <- apply(ss2500$twostep.means, 2, mean)
mean.par.naivepars <- apply(ss2500$naive.means, 2, mean)

median.par.naivepars <- apply(ss2500$naive.means, 2, median)
bias.naive.median <- abs(median.par.naivepars - true.outcome)

median.par.onestep <- apply(ss2500$onestep.means, 2, median)
bias.onestep.median <- abs(median.par.onestep - true.outcome)

median.par.twostep2pars <- apply(ss2500$twostep.means, 2, median)
bias.twostep2pars.median <- abs(median.par.twostep2pars - true.outcome)

subtract <- t(t(ss2500$onestep.means) - true.outcome)
subtract.sq <- subtract ^ 2
rmse.onestep <- sqrt(apply(subtract.sq, 2, mean))

subtract <- t(t(ss2500$twostep.means) - true.outcome)
subtract.sq <- subtract ^ 2
rmse.twostep2pars <- sqrt(apply(subtract.sq, 2, mean))

subtract <- t(t(ss2500$naive.means) - true.outcome)
subtract.sq <- subtract ^ 2
rmse.naive <- sqrt(apply(subtract.sq, 2, mean))

rmse <- rbind(rmse.onestep, rmse.twostep2pars, rmse.naive)
bias <- rbind(bias.onestep.median, bias.twostep2pars.median, bias.naive.median)
means <- rbind(mean.par.onestep, mean.par.twostep2pars, mean.par.naivepars)

table2500 <- rbind(means, bias, rmse)
rownames <- c("One Step Mean", "Two Step Mean", "Naive Mean", "One Step Median Deviation", "Two Step Median Deviation", "Naive Median Deviation", "One Step RMSE", "Two Step RMSE", "Naive RMSE")
colnames(table2500) <- c("X1 Outcome", "X2 Outcome", "Y0 Outcome", "Z Outcome")
rownames(table2500) <- rownames
table2500


############################################
############### Make Figures ###############
bias.x1.onestep <- c(abs(table1000[4,1]), abs(table1500[4,1]), abs(table2500[4,1]))
rmse.x1.onestep <- (c(table1000[7,1], table1500[7,1], table2500[7,1]))
bias.x1.twostep <- c(abs(table1000[5,1]), abs(table1500[5,1]), abs(table2500[5,1]))
rmse.x1.twostep <- (c(table1000[8,1], table1500[8,1], table2500[8,1]))
bias.x1.naive <- c(abs(table1000[6,1]), abs(table1500[6,1]), abs(table2500[6,1]))
rmse.x1.naive <- (c(table1000[9,1], table1500[9,1], table2500[9,1]))

bias.x2.onestep <- c(abs(table1000[4,2]), abs(table1500[4,2]), abs(table2500[4,2]))
rmse.x2.onestep <- (c(table1000[7,2], table1500[7,2], table2500[7,2]))
bias.x2.twostep <- c(abs(table1000[5,2]), abs(table1500[5,2]), abs(table2500[5,2]))
rmse.x2.twostep <- (c(table1000[8,2], table1500[8,2], table2500[8,2]))
bias.x2.naive <- c(abs(table1000[6,2]), abs(table1500[6,2]), abs(table2500[6,2]))
rmse.x2.naive <- (c(table1000[9,2], table1500[9,2], table2500[9,2]))

bias.y0.onestep <- c(abs(table1000[4,3]), abs(table1500[4,3]), abs(table2500[4,3]))
rmse.y0.onestep <- (c(table1000[7,3], table1500[7,3], table2500[7,3]))
bias.y0.twostep <- c(abs(table1000[5,3]), abs(table1500[5,3]), abs(table2500[5,3]))
rmse.y0.twostep <- (c(table1000[8,3], table1500[8,3], table2500[8,3]))
bias.y0.naive <- c(abs(table1000[6,3]), abs(table1500[6,3]), abs(table2500[6,3]))
rmse.y0.naive <- (c(table1000[9,3], table1500[9,3], table2500[9,3]))

bias.z.onestep <- c(abs(table1000[4,4]), abs(table1500[4,4]), abs(table2500[4,4]))
rmse.z.onestep <- (c(table1000[7,4], table1500[7,4], table2500[7,4]))
bias.z.twostep <- c(abs(table1000[5,4]), abs(table1500[5,4]), abs(table2500[5,4]))
rmse.z.twostep <- (c(table1000[8,4], table1500[8,4], table2500[8,4]))
bias.z.naive <- c(abs(table1000[6,4]), abs(table1500[6,4]), abs(table2500[6,4]))
rmse.z.naive <- (c(table1000[9,4], table1500[9,4], table2500[9,4]))

library("shape")
ssizes <- c(1000, 1500, 2500)
par(mfrow = c(4,2), mar=c(2.4,1.4,2.4,1.4), oma=c(3,3.5,3,0))

plot(ssizes, bias.x1.onestep, pch = 20, type = "b", xlab = "Sample Size", main = "", ylim = c(0,0.061), ylab = "Bias")
lines(ssizes, bias.x1.twostep, lty = 2, col = "black")
points(ssizes, bias.x1.twostep, pch = 20, col = "black")
lines(ssizes, bias.x1.naive, lty = 4, col = "black")
points(ssizes, bias.x1.naive, pch = 20, col = "black")
abline(h = 0.0, lty = 3, col = "black")
text(x = c(1175, 1150, 1350), y = c(0.05, 0.023, 0.017), col = c("black", "black", "black"), labels = c("Naive two-step", "Two-step", "One-step"))
Arrows(x0 = c(1175, 1350), x1 = c(1175, 1350), y0 = c(0.018, 0.014), y1 = c(0.009,0.003), arr.type = "triangle", arr.length = 0.2)

plot(ssizes, log(rmse.x1.onestep + 0.01), pch = 20, type = "b", xlab = "Sample Size", main = "", ylim = c(-4.7,1.1), ylab = "", yaxt = "n")
axis(2, at = c(-4.61, -2.81, -1.35, 0.01), labels = c("0", "0.05", "0.25", "1"))
lines(ssizes, log(rmse.x1.twostep + 0.01), col = "black", lty = 2)
points(ssizes, log(rmse.x1.twostep + 0.01), col = "black", pch = 20)
lines(ssizes, log(rmse.x1.naive + 0.01), col = "black", lty = 4)
points(ssizes, log(rmse.x1.naive + 0.01), col = "black", pch = 20)
abline(h = -4.61, lty = 3, col = "black")
text(x = c(1175, 1150, 1140), y = c(0.75, -1.5, -3.5), col = c("black", "black", "black"), labels = c("Naive two-step", "Two-step\n(dashed line)", "One-step\n(solid line)"))
Arrows(x0 = c(1260, 1275), x1 = c(1400, 1400), y0 = c(-2.0, -3.5), y1 = c(-2.45, -3.05), arr.type = "triangle", arr.length = 0.2)

plot(ssizes, bias.x2.onestep, pch = 20, type = "b", xlab = "Sample Size", main = "", ylim = c(0,0.061), ylab = "Bias")
lines(ssizes, bias.x2.twostep, lty = 2, col = "black")
points(ssizes, bias.x2.twostep, pch = 20, col = "black")
lines(ssizes, bias.x2.naive, lty = 4, col = "black")
points(ssizes, bias.x2.naive, pch = 20, col = "black")
abline(h = 0.0, lty = 3, col = "black")

plot(ssizes, log(rmse.x2.onestep + 0.01), pch = 20, type = "b", xlab = "Sample Size", main = "", ylim = c(-4.7,1.1), ylab = "", yaxt = "n")
axis(2, at = c(-4.61, -2.81, -1.35, 0.01), labels = c("0", "0.05", "0.25", "1"))
lines(ssizes, log(rmse.x2.twostep + 0.01), col = "black", lty = 2)
points(ssizes, log(rmse.x2.twostep + 0.01), col = "black", pch = 20)
lines(ssizes, log(rmse.x2.naive + 0.01), col = "black", lty = 4)
points(ssizes, log(rmse.x2.naive + 0.01), col = "black", pch = 20)
abline(h = -4.61, lty = 3, col = "black")

plot(ssizes, bias.y0.onestep, pch = 20, type = "b", xlab = "Sample Size", main = "", ylim = c(0,0.061), ylab = "Bias")
lines(ssizes, bias.y0.twostep, lty = 2, col = "black")
points(ssizes, bias.y0.twostep, pch = 20, col = "black")
lines(ssizes, bias.y0.naive, lty = 4, col = "black")
points(ssizes, bias.y0.naive, pch = 20, col = "black")
abline(h = 0.0, lty = 3, col = "black")

plot(ssizes, log(rmse.y0.onestep + 0.01), pch = 20, type = "b", xlab = "Sample Size", main = "", ylim = c(-4.7,1.1), ylab = "", yaxt = "n")
axis(2, at = c(-4.61, -2.81, -1.35, 0.01), labels = c("0", "0.05", "0.25", "1"))
lines(ssizes, log(rmse.y0.twostep + 0.01), col = "black", lty = 2)
points(ssizes, log(rmse.y0.twostep + 0.01), col = "black", pch = 20)
lines(ssizes, log(rmse.y0.naive + 0.01), col = "black", lty = 4)
points(ssizes, log(rmse.y0.naive + 0.01), col = "black", pch = 20)
abline(h = -4.61, lty = 3, col = "black")

plot(ssizes, bias.z.onestep, pch = 20, type = "b", xlab = "Sample Size", main = "", ylim = c(0,0.061), ylab = "Bias")
lines(ssizes, bias.z.twostep, lty = 2, col = "black")
points(ssizes, bias.z.twostep, pch = 20, col = "black")
lines(ssizes, bias.z.naive, lty = 4, col = "black")
points(ssizes, bias.z.naive, pch = 20, col = "black")
abline(h = 0.0, lty = 3, col = "black")

plot(ssizes, log(rmse.z.onestep + .01), pch = 20, type = "b", xlab = "Sample Size", main = "", ylim = c(-4.7,1.1), ylab = "", yaxt = "n")
axis(2, at = c(-4.61, -2.81, -1.35, 0.01), labels = c("0", "0.05", "0.25", "1"))
lines(ssizes, log(rmse.z.twostep + .01), col = "black", lty = 2)
points(ssizes, log(rmse.z.twostep + .01), col = "black", pch = 20)
lines(ssizes, log(rmse.z.naive + .01), col = "black", lty = 4)
points(ssizes, log(rmse.z.naive + .01), col = "black", pch = 20)
abline(h = -4.61, lty = 3, col = "black")

mtext("Intercept", side = 2, outer = TRUE, at = 0.86, cex = 1, padj = -2.2)
mtext("Covariate", side = 2, outer = TRUE, at = 0.62, cex = 1, padj = -2.2)
mtext("Control Items", side = 2, outer = TRUE, at = 0.38, cex = 1, padj = -2.2)
mtext("Sensitive Item", side = 2, outer = TRUE, at = 0.12, cex = 1, padj = -2.2)

mtext(text = "Absolute Median Deviation", side = 3, outer = TRUE, at = 0.27, cex = 1)
mtext(text = "Root Mean Square Error", side = 3, outer = TRUE, at = 0.73, cex = 1)

mtext("Sample Size", side = 1, outer = TRUE, line = 0.5, at = 0.27, cex = 0.75, padj = 1)
mtext("Sample Size", side = 1, outer = TRUE, line = 0.5, at = 0.73, cex = 0.75, padj = 1)
